1 Load Libraries Used

library(jsonlite)
library(tidyverse)
library(ggplot2)
library(rworldmap)
library(soccermatics)
library(dplyr)
library(fmsb)
library(plotly)
library(lubridate)
library(networkD3)
library(ggrepel)
library(htmltools)
library(htmlwidgets)
library(scales)

2 Data Loading and Cleaning (“Statsbomb/Open-Data” 2021)

matchDetails <- fromJSON("ChampionsLeagueFinalMatch.json", flatten = TRUE)  #Import json file and convert it to a dataframe
matchEvents <- fromJSON("ChampionsLeagueFinalEvents.json", flatten = TRUE)  #Import json file and convert it to a dataframe
matchLineUp <- fromJSON("ChampionsLeagueFinalLineup.json", flatten = TRUE)  #Import json file and convert it to a dataframe

# Converted England and Scotland to UK because the map package groups them
# together under UK
matchLineUp[[3]][[2]]["country.name"][matchLineUp[[3]][[2]]["country.name"] == "England"] = "UK"
matchLineUp[[3]][[2]]["country.name"][matchLineUp[[3]][[2]]["country.name"] == "Scotland"] = "UK"

# Converted England to UK and Korea (South) to south korea
matchLineUp[[3]][[1]]["country.name"][matchLineUp[[3]][[1]]["country.name"] == "England"] = "UK"
matchLineUp[[3]][[1]]["country.name"][matchLineUp[[3]][[1]]["country.id"] == "121"] = "South Korea"

# Clean all location coordinates by splitting the X,Y,Z axis into separate
# fields. Event Location
for (i in 1:nrow(matchEvents)) {
    if (is.numeric(matchEvents[[i, 10]])) {
        matchEvents$location.x[i] = matchEvents[[i, 10]][1]
        matchEvents$location.y[i] = matchEvents[[i, 10]][2]
    } else {
        matchEvents$location.x[i] = NA
        matchEvents$location.y[i] = NA
    }
}
# carry end location
for (i in 1:nrow(matchEvents)) {
    if (is.numeric(matchEvents[[i, 56]])) {
        matchEvents$carry.end_location.x[i] = matchEvents[[i, 56]][1]
        matchEvents$carry.end_location.y[i] = matchEvents[[i, 56]][2]
    } else {
        matchEvents$carry.end_location.x[i] = NA
        matchEvents$carry.end_location.y[i] = NA
    }
}
# pass end location
for (i in 1:nrow(matchEvents)) {
    if (is.numeric(matchEvents[[i, 31]])) {
        matchEvents$pass.end_location.x[i] = matchEvents[[i, 31]][1]
        matchEvents$pass.end_location.y[i] = matchEvents[[i, 31]][2]
    } else {
        matchEvents$pass.end_location.x[i] = NA
        matchEvents$pass.end_location.y[i] = NA
    }
}
# shot end location
for (i in 1:nrow(matchEvents)) {
    if (is.numeric(matchEvents[[i, 69]])) {
        matchEvents$shot.end_location.x[i] = matchEvents[[i, 69]][1]
        matchEvents$shot.end_location.y[i] = matchEvents[[i, 69]][2]
        matchEvents$shot.end_location.z[i] = matchEvents[[i, 69]][3]
    } else {
        matchEvents$shot.end_location.x[i] = NA
        matchEvents$shot.end_location.y[i] = NA
        matchEvents$shot.end_location.z[i] = NA
    }
}

3 What are the nationalities of Liverpool players? (“Rworldmap Package R Documentation” n.d.)

#Show a bubbble map of the player's nationality count
matchLineUp[[3]][[2]] %>% count(country.name) %>%
  joinCountryData2Map(joinCode = "NAME",
                      nameJoinColumn = "country.name") %>%
  mapBubbles( nameZSize="n" 
              , nameZColour="country.name"
              ,legendTitle ="No of Players"
              #,colourPalette='topo', #rainbow
              ,oceanCol='lightblue', landCol='white'
              ,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8)) 
title(main="Liverpool Players Nationality")   

par(mai=c(0.3,0.2,0.2,0),xaxs="i",yaxs="i")
#Show a bubbble map of the player's nationality count hignlighting just european countries
matchLineUp[[3]][[2]] %>% count(country.name) %>%
  joinCountryData2Map(joinCode = "NAME",
                      nameJoinColumn = "country.name") %>%
  mapBubbles( nameZSize="n"
              , nameZColour="country.name"
              ,mapRegion = "eurasia" #where I specified the map to show just european countries
             ,legendTitle ="No of Players"
              ,oceanCol='lightblue', landCol='white'
             ,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8)) 
title(main="Liverpool Players Nationality showing the European Countries") 

4 What are the nationalities of Tottenham players?

#Show a bubbble map of the player's nationality count
matchLineUp[[3]][[1]] %>% count(country.name) %>%
  joinCountryData2Map(joinCode = "NAME",
                      nameJoinColumn = "country.name") %>%
  mapBubbles( nameZSize="n"
              , nameZColour="country.name"
              ,legendTitle ="No of Players"
              #colourPalette='topo', #rainbow
              ,oceanCol='lightblue', landCol='white'
              ,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8) )
title(main="Tottenham Players Nationality ") 

par(mai=c(0.3,0.2,0.2,0),xaxs="i",yaxs="i")
#Show a bubbble map of the player's nationality count hignlighting just european countries
matchLineUp[[3]][[1]] %>% count(country.name) %>%
  joinCountryData2Map(joinCode = "NAME",
                      nameJoinColumn = "country.name") %>%
  mapBubbles( nameZSize="n"
              , nameZColour="country.name"
              ,mapRegion = "eurasia" #where I specified the map to show just european countries
              ,legendTitle ="No of Players"
              ,oceanCol='lightblue', landCol='white'
              ,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8)) 
title(main="Tottenham Players Nationality showing the European Countries") 

5 What are the nationalities of all the players? (“Rworldmap Package R Documentation” n.d.)

# show a barplot of all player's nationality
matchLineUp[[3]][[1]] %>% bind_rows(matchLineUp[[3]][[2]]) %>% count(country.name) %>% 
    barplotCountryData(nameColumnToPlot = "n", nameCountryColumn = "country.name", 
        numPanels = 1, main = "Players Nationality Count in Final", catMethod = "categorical", 
        colourPalette = adjustcolor(rwmGetColours("palette", numColours = 5), alpha.f = 0.8))

6 How many events per Team do we have in the dataset?

 ggplotly(ggplot(matchEvents, aes(x=type.name,fill=type.name)) +
   geom_bar()+
   facet_wrap(~team.name) + # shows two output for each team
   theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
   labs(#subtitle="", 
     y="Number of events", 
     x="Event Name", 
     title="Bar Chart of match events per Team" 
   )
 )

7 What pattern of play was used by each team?

 ggplotly(ggplot(matchEvents, aes(x=play_pattern.name,fill=play_pattern.name)) +
   geom_bar()+
   facet_wrap(~team.name) + # shows two output for each team
   theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
   labs(#subtitle="", 
     y="Number of events", 
     x="Play Pattern Name", 
     title="Bar Chart of Play pattern per Team" 
   )
 )

8 What body part was used for passes made by each team?

 ggplotly(
   matchEvents %>%
     filter(type.name=="Pass") %>%
     ggplot(aes(x=pass.height.name,fill=pass.body_part.name)) +
     geom_bar()+
     facet_wrap(~team.name) +
     theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
     labs(#subtitle="", 
       y="Number of events", 
       x="Pass Type", 
       title="Bar Chart of pass type by body part per team " 
     )
 )

9 What resulted in the shots made by each team?

 ggplotly(
   matchEvents %>%
     filter(type.name=="Shot") %>% # filter for only shots
     ggplot(aes(x=type.name,fill=shot.type.name)) +
     geom_bar()+
     facet_wrap(~team.name) +
     theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
     labs(#subtitle="", 
       y="Number of events", 
       x="Play Pattern Name", 
       title="Bar Chart of match events per each play pattern for each team" 
     )
 )

10 How good were both teams in Dribbling?

 ggplotly(
   matchEvents %>%
     filter(type.name=="Dribble") %>%
     ggplot(aes(x=type.name,fill=dribble.outcome.name)) +
     geom_bar()+
     facet_wrap(~team.name) +
     theme(axis.text.x = element_text(angle=90)) +
     labs(#subtitle="", 
       y="Number of Dribles", 
       x="Dribbles", 
       title="Bar Chart of Dribbles for each team" 
     )
 )

11 Which team had possession in each period of the 1st Half?

ggplotly(matchEvents %>%
  filter(period==1) %>% # filter for just the first half
  mutate(timestamp=parse_date_time(timestamp, orders = "HMS")) %>% #convert the timestamp field to a date_time class
  ggplot(aes(x = timestamp, y = possession_team.id)) +
  geom_line(color = "grey", 
            size=0.5 ) +
  scale_x_datetime(date_breaks = "5 min",labels = date_format("%H:%M:%S")) + # Use the hour,minute and second as the label for the x axis
  labs(title = "Ball Possession between both teams in the 1st Half",
       subtitle = "",
       x = "Time",
       y = "Teams") +
  theme_minimal() +
  scale_y_continuous(breaks = c(24,38), 
                     labels=c("Liverpool","Tottenham")) + 
  facet_wrap(~period) +
  theme(axis.text.x = element_text(angle=90)) 
)

12 Which team had possession in each period of the 2nd Half?

ggplotly(matchEvents %>%
  filter(period==2) %>% # filter for just the first half
  mutate(timestamp=parse_date_time(timestamp, orders = "HMS")) %>% #convert the timestamp field to a date_time class
  ggplot(aes(x = timestamp, y = possession_team.id)) +
  geom_line(color = "grey", 
            size=0.5 ) +
  scale_x_datetime(date_breaks = "5 min",labels = date_format("%H:%M:%S")) + # Use the hour,minute and second as the label for the x axis
  labs(title = "Ball Possession between both teams in the 2nd Half",
       subtitle = "",
       x = "Time",
       y = "Teams") +
  theme_minimal() +
  scale_y_continuous(breaks = c(24,38), 
                     labels=c("Liverpool","Tottenham")) + 
  facet_wrap(~period) +
  theme(axis.text.x = element_text(angle=90)) 
)

13 At what part of the field did Liverpool players make their shots? (Gallagher 2021)

liverpoolShots<-matchEvents %>%
  filter(team.name == "Liverpool",type.name=="Shot")
ggplotly(
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
            title = "", 
            subtitle = "") +
  geom_point(data = liverpoolShots, aes(x = location.x, y = location.y,fill=shot.outcome.name),size=4,shape = 21) + #where I included the coorinates for the shot 
  scale_y_reverse() + #reverses the y axis of the pitch as it was inverted
  ggtitle("Shotmap",
          "Shotmap of Liverpool")
)

14 What were the outcome of the shots made by Liverpool?

liverpoolShot <- matchEvents %>% #Cant show 3d
  filter(type.name == "Shot" & team.name == "Liverpool") 
ggplotly(
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
            title = "", 
            subtitle = "shot map") +
  geom_segment(data = liverpoolShot, aes(x = location.x, xend = shot.end_location.x, y = location.y, yend = shot.end_location.y, col = shot.outcome.name)) +
  geom_point(data = liverpoolShot, aes(x = location.x, y = location.y, col = shot.outcome.name), size=4) +
  scale_y_reverse()
)

15 At what part of the field did Tottenham players make their shots?

tottShots <- matchEvents %>% filter(team.name == "Tottenham Hotspur", type.name == 
    "Shot")
ggplotly(soccerPitch(lengthPitch = 120, widthPitch = 80, arrow = "r", theme = "grass", 
    title = "", subtitle = "") + geom_point(data = tottShots, aes(x = location.x, 
    y = location.y, fill = shot.outcome.name), size = 4, shape = 21) + scale_y_reverse() + 
    ggtitle("Shotmap", "Shotmap of Tottenham"))

16 What were the outcome of the shots made by Liverpool?

tottenhamShot <- matchEvents %>% filter(type.name == "Shot" & team.name == "Tottenham Hotspur")
ggplotly(soccerPitch(lengthPitch = 120, widthPitch = 80, arrow = "r", theme = "grass", 
    title = "", subtitle = "Shot map") + geom_segment(data = tottenhamShot, aes(x = location.x, 
    xend = shot.end_location.x, y = location.y, yend = shot.end_location.y, col = shot.outcome.name)) + 
    geom_point(data = tottenhamShot, aes(x = location.x, y = location.y, col = shot.outcome.name), 
        size = 4) + scale_y_reverse())

17 What was the pass distribution for Liverpool? (Gallagher 2021)

matchEvents %>% #This excludes pass outcomes of types "Injury Clearance" and "Unknown"
  filter(team.name == "Liverpool",type.name=="Pass") %>%
  soccerPassmap(fill = "lightblue", arrow = "r",theme="grass",
                title = "",lengthPitch=120,widthPitch=80) +
  scale_y_reverse()

18 What was the pass distribution for Tottenham?

matchEvents %>% #This excludes pass outcomes of types "Injury Clearance" and "Unknown"
  filter(team.name == "Tottenham Hotspur",type.name=="Pass") %>%
  soccerPassmap(fill = "lightblue", arrow = "r",theme="grass",
                title = "",lengthPitch=120,widthPitch=80) +
  scale_y_reverse()

19 What was the pass distribution per player? (Holtz n.d.b)

 visualizePassesPerPlayer<- function(player_name){ # function to visualize passes per player
   links<-  matchEvents %>% # Dataframe to store player name, recipients of passes and number of passes made.
     filter(type.name=="Pass",player.name == player_name,is.na(pass.recipient.name)== F) %>%
     select(player.name,pass.recipient.name) %>%
     count(player.name,pass.recipient.name)
   
   # Dataframe to store all unique players, either a recipient of a pass or the person who made the pass
   nodes <- data.frame(
     name=c(as.character(links$player.name),
            as.character(links$pass.recipient.name)) %>% unique()
   )
   
   # The ID to be used to link the passer and the recipient of the pass
   links$IDsource <- match(links$player.name, nodes$name)-1 #returns the position of the player name in the dataframe that stores all players
   links$IDtarget <- match(links$pass.recipient.name, nodes$name)-1 #returns the position of the recipient name in the dataframe that stores all players
   
   # Create the Network
   passNetwork <- sankeyNetwork(Links = links, Nodes = nodes,
                      Source = "IDsource", Target = "IDtarget",
                      Value = "n", NodeID = "name", 
                      sinksRight=FALSE,
                      width= 900, height=600,
                      fontSize= 12, nodeWidth = 15)
   
   passNetwork <- prependContent(passNetwork, tags$h3("Total Passes Made to each Player")) # Add a title to the Network Diagram
   
   return(passNetwork)
 }
 
 visualizePassesPerPlayer("Alisson Ramsés Becker")

Total Passes Made to each Player

20 How many shots did each of the players have?

# Nmber of shots each player had
ggplotly(matchEvents %>% filter(type.name == "Shot") %>% count(player.name, team.name) %>% 
    ggplot(aes(n, player.name, color = team.name)) + geom_point(size = 5) + labs(x = "Number of Shots", 
    title = "No of Shots Per Player", subtitle = "") + theme_minimal())

21 What areas did the players cover during the game?

# Function to show Touch map of each player
playerTouchMap <- function(playerName) {
    ggplotly(matchEvents %>% filter(player.name == playerName) %>% select(location.x, 
        location.y) %>% rename(x = location.x, y = location.y) %>% soccerPath(col = "red", 
        lengthPitch = 120, widthPitch = 80, arrow = "r", lwd = 0.5) + scale_y_reverse() + 
        labs(title = "Player's Touch Map", subtitle = ""))
}
playerTouchMap("Mohamed Salah")

22 Where were the Tottenham players most of the times?

# Average player position for Tottenham
tottAvgPos <- matchEvents %>% filter(team.name == "Tottenham Hotspur", is.na(player.name) == 
    F) %>% group_by(player.name) %>% summarise(x = mean(location.x, na.rm = T), y = mean(location.y, 
    na.rm = T))  #Compute the average(mean) X and Y coordinates for the players
soccerPitch(lengthPitch = 120, widthPitch = 80, arrow = "r", theme = "grass", title = "", 
    subtitle = "") + geom_point(data = tottAvgPos, aes(x = x, y = y), size = 4, shape = 21, 
    fill = "blue") + scale_y_reverse() + ggtitle("Average Positions", "Average Position Per Player") + 
    geom_label_repel(data = tottAvgPos, aes(x, y, label = str_split(player.name, 
        " ", simplify = T)[, 2]), fontface = "bold", size = 4)  #Add a label for the player names and avoid the names from overlapping across each other.

23 Where were the Liverpool players most of the times?

# Average player position for Liverpool
livAvgPos <- matchEvents %>% filter(team.name == "Liverpool", is.na(player.name) == 
    F) %>% group_by(player.name) %>% summarise(x = mean(location.x, na.rm = T), y = mean(location.y, 
    na.rm = T))  #Compute the average(mean) X and Y coordinates for the players

soccerPitch(lengthPitch = 120, widthPitch = 80, arrow = "r", title = "", subtitle = "") + 
    geom_point(data = livAvgPos, aes(x = x, y = y), size = 4, shape = 21, fill = "red") + 
    scale_y_reverse() + ggtitle("Average Positions", "Average Position Per Player") + 
    geom_label_repel(data = livAvgPos, aes(x, y, label = str_split(player.name, " ", 
        simplify = T)[, 2]), fontface = "bold", size = 4)  #Add a label for the player names and avoid the names from overlapping across each other.

24 Who was better Salah vs Kane? (Holtz n.d.a)

#Comparison of Salah and Kane
salVsKane<-matchEvents %>%
   count(player.name,type.name) %>%
   filter(player.name %in% c("Harry Kane","Mohamed Salah"),type.name %in% c("Ball Receipt*","Ball Recovery","Carry","Dispossessed","Duel","Pass","Pressure","Shot")) %>% # List of qualities to compare both players on
   arrange(player.name,type.name)
 
 
 data <- as.data.frame(matrix( salVsKane$n , ncol=8,byrow = T)) #turn the data into a matrix with each quality being a column
 colnames(data) <- c("Ball Receipt","Ball Recovery","Carry","Dispossessed","Duel","Pass","Pressure","Shot")
 rownames(data) <- c("Kane","Salah")
 
 # Include two new rows to the dataframe to specify the maximum and minimum value for the qualities being assessed. maximum of 60 and minimum of 0. Replicated 8 times across each column.
 data <- rbind(rep(60,8) , rep(0,8) , data)
 
 
 # Specify Colors
 colors_border=c( rgb(0.2,0.5,0.5,0.9), rgb(0.8,0.2,0.5,0.9) , rgb(0.7,0.5,0.1,0.9) )
 colors_in=c( rgb(0.2,0.5,0.5,0.4), rgb(0.8,0.2,0.5,0.4) , rgb(0.7,0.5,0.1,0.4) )
 
 # plot the radar chart
 radarchart(data, axistype=1 , 
            pcol=colors_border , pfcol=colors_in , plwd=4 , plty=1,
            cglcol="grey", cglty=1, axislabcol="grey", caxislabels=seq(0,60,15), cglwd=0.8,
            vlcex=0.8 )
 
 # Add a legend to the chart
 legend(x=1, y=1, legend = rownames(data[-c(1,2),]), bty = "n", pch=20 , col=colors_in , text.col = "grey", cex=1.2, pt.cex=3)

25 What substitutions were made during the game?

#Visualise Substitutions made
 # Function to visualize all the substitutions made in the game and the time it was made
 visualizeSubsMade<- function(){
   sub_links<-  matchEvents %>%
     filter(type.name=="Substitution",minute<=100) %>% #Include all substitutions on or before the 100th minute
     select(player.name,substitution.replacement.name,minute) 
   
   if (nrow(sub_links) > 0) { #Condition to return "No Substitution" if none were made before the time
     #All unique players involved in the substitutions made
     sub_nodes <- data.frame(
       name=c(as.character(sub_links$player.name), 
              as.character(sub_links$substitution.replacement.name)) %>% unique()
     )
     
     #ID to link the substituted player with the incoming player
     sub_links$IDsource <- match(sub_links$player.name, sub_nodes$name)-1 
     sub_links$IDtarget <- match(sub_links$substitution.replacement.name, sub_nodes$name)-1
     
     # Assign colors to show player's being substituted and their replacements
     color <- 'd3.scaleOrdinal() .domain(["Roberto Firmino Barbosa de Oliveira", "Georginio Wijnaldum","Harry Winks",
"Moussa Sissoko", "Bamidele Alli", "Sadio Mané", "Divock Okoth Origi", "James Philip Milner","Lucas Rodrigues Moura da Silva",
"Eric Dier","Fernando Llorente Torres","Joe Gomez"]) .range(["red", "red" , "red", "red", "red", "red", "green", "green", "green", "green"
, "green", "green"])'
     
     # Create the network
     subsMadeNetwork <- sankeyNetwork(Links = sub_links, Nodes = sub_nodes,
                        Source = "IDsource", Target = "IDtarget",
                        Value = "minute", NodeID = "name", 
                        sinksRight=F,
                        width= 900, height=600,
                        fontSize= 12, nodeWidth = 15,
                        colourScale=color)
     
     subsMadeNetwork <- prependContent(subsMadeNetwork, tags$h1("Substitutions made"))
     return(subsMadeNetwork)}
   else{
     print("No Substitution")
   } 
 }
 
 visualizeSubsMade()

Substitutions made

26 Appendix

knitr::opts_chunk$set(echo = TRUE,message=FALSE,warning=FALSE,error=FALSE,tidy=TRUE)
library(jsonlite)
library(tidyverse)
library(ggplot2)
library(rworldmap)
library(soccermatics)
library(dplyr)
library(fmsb)
library(plotly)
library(lubridate)
library(networkD3)
library(ggrepel)
library(htmltools)
library(htmlwidgets)
library(scales)
matchDetails <- fromJSON("ChampionsLeagueFinalMatch.json", flatten=TRUE) #Import json file and convert it to a dataframe
matchEvents <- fromJSON("ChampionsLeagueFinalEvents.json", flatten=TRUE) #Import json file and convert it to a dataframe
matchLineUp <- fromJSON("ChampionsLeagueFinalLineup.json", flatten=TRUE) #Import json file and convert it to a dataframe

#Converted England and Scotland to UK because the map package groups them together under UK
matchLineUp[[3]][[2]]["country.name"][matchLineUp[[3]][[2]]["country.name"]=="England"]="UK"
matchLineUp[[3]][[2]]["country.name"][matchLineUp[[3]][[2]]["country.name"]=="Scotland"]="UK"

#Converted England to UK and Korea (South) to south korea 
matchLineUp[[3]][[1]]["country.name"][matchLineUp[[3]][[1]]["country.name"]=="England"]="UK"
matchLineUp[[3]][[1]]["country.name"][matchLineUp[[3]][[1]]["country.id"]=="121"]="South Korea"

#Clean all location coordinates by splitting the X,Y,Z axis into separate fields.
#Event Location
for (i in 1:nrow(matchEvents)){
  if (is.numeric(matchEvents[[i,10]])){
    matchEvents$location.x[i]=matchEvents[[i,10]][1]
    matchEvents$location.y[i]=matchEvents[[i,10]][2]
  }
  else{
    matchEvents$location.x[i]=NA
    matchEvents$location.y[i]=NA
  }
}
#carry end location 
for (i in 1:nrow(matchEvents)){
  if (is.numeric(matchEvents[[i,56]])){
    matchEvents$carry.end_location.x[i]=matchEvents[[i,56]][1]
    matchEvents$carry.end_location.y[i]=matchEvents[[i,56]][2]
  }
  else{
    matchEvents$carry.end_location.x[i]=NA
    matchEvents$carry.end_location.y[i]=NA
  }
}
#pass end location 
for (i in 1:nrow(matchEvents)){
  if (is.numeric(matchEvents[[i,31]])){
    matchEvents$pass.end_location.x[i]=matchEvents[[i,31]][1]
    matchEvents$pass.end_location.y[i]=matchEvents[[i,31]][2]
  }
  else{
    matchEvents$pass.end_location.x[i]=NA
    matchEvents$pass.end_location.y[i]=NA
  }
}
#shot end location 
for (i in 1:nrow(matchEvents)){
  if (is.numeric(matchEvents[[i,69]])){
    matchEvents$shot.end_location.x[i]=matchEvents[[i,69]][1]
    matchEvents$shot.end_location.y[i]=matchEvents[[i,69]][2]
    matchEvents$shot.end_location.z[i]=matchEvents[[i,69]][3]
  }
  else{
    matchEvents$shot.end_location.x[i]=NA
    matchEvents$shot.end_location.y[i]=NA
    matchEvents$shot.end_location.z[i]=NA
  }
}

#Show a bubbble map of the player's nationality count
matchLineUp[[3]][[2]] %>% count(country.name) %>%
  joinCountryData2Map(joinCode = "NAME",
                      nameJoinColumn = "country.name") %>%
  mapBubbles( nameZSize="n" 
              , nameZColour="country.name"
              ,legendTitle ="No of Players"
              #,colourPalette='topo', #rainbow
              ,oceanCol='lightblue', landCol='white'
              ,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8)) 
title(main="Liverpool Players Nationality")   

par(mai=c(0.3,0.2,0.2,0),xaxs="i",yaxs="i")
#Show a bubbble map of the player's nationality count hignlighting just european countries
matchLineUp[[3]][[2]] %>% count(country.name) %>%
  joinCountryData2Map(joinCode = "NAME",
                      nameJoinColumn = "country.name") %>%
  mapBubbles( nameZSize="n"
              , nameZColour="country.name"
              ,mapRegion = "eurasia" #where I specified the map to show just european countries
             ,legendTitle ="No of Players"
              ,oceanCol='lightblue', landCol='white'
             ,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8)) 
title(main="Liverpool Players Nationality showing the European Countries") 
#Show a bubbble map of the player's nationality count
matchLineUp[[3]][[1]] %>% count(country.name) %>%
  joinCountryData2Map(joinCode = "NAME",
                      nameJoinColumn = "country.name") %>%
  mapBubbles( nameZSize="n"
              , nameZColour="country.name"
              ,legendTitle ="No of Players"
              #colourPalette='topo', #rainbow
              ,oceanCol='lightblue', landCol='white'
              ,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8) )
title(main="Tottenham Players Nationality ") 

par(mai=c(0.3,0.2,0.2,0),xaxs="i",yaxs="i")
#Show a bubbble map of the player's nationality count hignlighting just european countries
matchLineUp[[3]][[1]] %>% count(country.name) %>%
  joinCountryData2Map(joinCode = "NAME",
                      nameJoinColumn = "country.name") %>%
  mapBubbles( nameZSize="n"
              , nameZColour="country.name"
              ,mapRegion = "eurasia" #where I specified the map to show just european countries
              ,legendTitle ="No of Players"
              ,oceanCol='lightblue', landCol='white'
              ,colourPalette=adjustcolor(rwmGetColours("palette", numColours=7), alpha.f = 0.8)) 
title(main="Tottenham Players Nationality showing the European Countries") 
# show a barplot of all player's nationality
matchLineUp[[3]][[1]] %>%
  bind_rows(matchLineUp[[3]][[2]]) %>%
    count(country.name) %>%
      barplotCountryData(nameColumnToPlot="n" 
                    , nameCountryColumn = "country.name"
                    ,numPanels = 1
                    ,main = "Players Nationality Count in Final"
                    ,catMethod = "categorical"
                    ,colourPalette=adjustcolor(rwmGetColours("palette", numColours=5), alpha.f = 0.8)
)

 ggplotly(ggplot(matchEvents, aes(x=type.name,fill=type.name)) +
   geom_bar()+
   facet_wrap(~team.name) + # shows two output for each team
   theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
   labs(#subtitle="", 
     y="Number of events", 
     x="Event Name", 
     title="Bar Chart of match events per Team" 
   )
 )
 ggplotly(ggplot(matchEvents, aes(x=play_pattern.name,fill=play_pattern.name)) +
   geom_bar()+
   facet_wrap(~team.name) + # shows two output for each team
   theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
   labs(#subtitle="", 
     y="Number of events", 
     x="Play Pattern Name", 
     title="Bar Chart of Play pattern per Team" 
   )
 )
 ggplotly(
   matchEvents %>%
     filter(type.name=="Pass") %>%
     ggplot(aes(x=pass.height.name,fill=pass.body_part.name)) +
     geom_bar()+
     facet_wrap(~team.name) +
     theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
     labs(#subtitle="", 
       y="Number of events", 
       x="Pass Type", 
       title="Bar Chart of pass type by body part per team " 
     )
 )
 ggplotly(
   matchEvents %>%
     filter(type.name=="Shot") %>% # filter for only shots
     ggplot(aes(x=type.name,fill=shot.type.name)) +
     geom_bar()+
     facet_wrap(~team.name) +
     theme(axis.text.x = element_text(angle=90)) + # rotates the label in the x axis
     labs(#subtitle="", 
       y="Number of events", 
       x="Play Pattern Name", 
       title="Bar Chart of match events per each play pattern for each team" 
     )
 )
 ggplotly(
   matchEvents %>%
     filter(type.name=="Dribble") %>%
     ggplot(aes(x=type.name,fill=dribble.outcome.name)) +
     geom_bar()+
     facet_wrap(~team.name) +
     theme(axis.text.x = element_text(angle=90)) +
     labs(#subtitle="", 
       y="Number of Dribles", 
       x="Dribbles", 
       title="Bar Chart of Dribbles for each team" 
     )
 )
ggplotly(matchEvents %>%
  filter(period==1) %>% # filter for just the first half
  mutate(timestamp=parse_date_time(timestamp, orders = "HMS")) %>% #convert the timestamp field to a date_time class
  ggplot(aes(x = timestamp, y = possession_team.id)) +
  geom_line(color = "grey", 
            size=0.5 ) +
  scale_x_datetime(date_breaks = "5 min",labels = date_format("%H:%M:%S")) + # Use the hour,minute and second as the label for the x axis
  labs(title = "Ball Possession between both teams in the 1st Half",
       subtitle = "",
       x = "Time",
       y = "Teams") +
  theme_minimal() +
  scale_y_continuous(breaks = c(24,38), 
                     labels=c("Liverpool","Tottenham")) + 
  facet_wrap(~period) +
  theme(axis.text.x = element_text(angle=90)) 
)
ggplotly(matchEvents %>%
  filter(period==2) %>% # filter for just the first half
  mutate(timestamp=parse_date_time(timestamp, orders = "HMS")) %>% #convert the timestamp field to a date_time class
  ggplot(aes(x = timestamp, y = possession_team.id)) +
  geom_line(color = "grey", 
            size=0.5 ) +
  scale_x_datetime(date_breaks = "5 min",labels = date_format("%H:%M:%S")) + # Use the hour,minute and second as the label for the x axis
  labs(title = "Ball Possession between both teams in the 2nd Half",
       subtitle = "",
       x = "Time",
       y = "Teams") +
  theme_minimal() +
  scale_y_continuous(breaks = c(24,38), 
                     labels=c("Liverpool","Tottenham")) + 
  facet_wrap(~period) +
  theme(axis.text.x = element_text(angle=90)) 
)
liverpoolShots<-matchEvents %>%
  filter(team.name == "Liverpool",type.name=="Shot")
ggplotly(
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
            title = "", 
            subtitle = "") +
  geom_point(data = liverpoolShots, aes(x = location.x, y = location.y,fill=shot.outcome.name),size=4,shape = 21) + #where I included the coorinates for the shot 
  scale_y_reverse() + #reverses the y axis of the pitch as it was inverted
  ggtitle("Shotmap",
          "Shotmap of Liverpool")
)
liverpoolShot <- matchEvents %>% #Cant show 3d
  filter(type.name == "Shot" & team.name == "Liverpool") 
ggplotly(
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
            title = "", 
            subtitle = "shot map") +
  geom_segment(data = liverpoolShot, aes(x = location.x, xend = shot.end_location.x, y = location.y, yend = shot.end_location.y, col = shot.outcome.name)) +
  geom_point(data = liverpoolShot, aes(x = location.x, y = location.y, col = shot.outcome.name), size=4) +
  scale_y_reverse()
)
tottShots<-matchEvents %>%
  filter(team.name == "Tottenham Hotspur",type.name=="Shot")
ggplotly(
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
            title = "", 
            subtitle = "") +
  geom_point(data = tottShots, aes(x = location.x, y = location.y,fill=shot.outcome.name),size=4,shape = 21) +
  scale_y_reverse() +
  ggtitle("Shotmap",
          "Shotmap of Tottenham")
)
tottenhamShot <- matchEvents %>% 
  filter(type.name == "Shot" & team.name == "Tottenham Hotspur") 
ggplotly(
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
            title = "", 
            subtitle = "Shot map") +
  geom_segment(data = tottenhamShot, aes(x = location.x, xend = shot.end_location.x, y = location.y, yend = shot.end_location.y, col = shot.outcome.name)) +
  geom_point(data = tottenhamShot, aes(x = location.x, y = location.y, col = shot.outcome.name), size=4) +
  scale_y_reverse()
)

matchEvents %>% #This excludes pass outcomes of types "Injury Clearance" and "Unknown"
  filter(team.name == "Liverpool",type.name=="Pass") %>%
  soccerPassmap(fill = "lightblue", arrow = "r",theme="grass",
                title = "",lengthPitch=120,widthPitch=80) +
  scale_y_reverse()

matchEvents %>% #This excludes pass outcomes of types "Injury Clearance" and "Unknown"
  filter(team.name == "Tottenham Hotspur",type.name=="Pass") %>%
  soccerPassmap(fill = "lightblue", arrow = "r",theme="grass",
                title = "",lengthPitch=120,widthPitch=80) +
  scale_y_reverse()
 visualizePassesPerPlayer<- function(player_name){ # function to visualize passes per player
   links<-  matchEvents %>% # Dataframe to store player name, recipients of passes and number of passes made.
     filter(type.name=="Pass",player.name == player_name,is.na(pass.recipient.name)== F) %>%
     select(player.name,pass.recipient.name) %>%
     count(player.name,pass.recipient.name)
   
   # Dataframe to store all unique players, either a recipient of a pass or the person who made the pass
   nodes <- data.frame(
     name=c(as.character(links$player.name),
            as.character(links$pass.recipient.name)) %>% unique()
   )
   
   # The ID to be used to link the passer and the recipient of the pass
   links$IDsource <- match(links$player.name, nodes$name)-1 #returns the position of the player name in the dataframe that stores all players
   links$IDtarget <- match(links$pass.recipient.name, nodes$name)-1 #returns the position of the recipient name in the dataframe that stores all players
   
   # Create the Network
   passNetwork <- sankeyNetwork(Links = links, Nodes = nodes,
                      Source = "IDsource", Target = "IDtarget",
                      Value = "n", NodeID = "name", 
                      sinksRight=FALSE,
                      width= 900, height=600,
                      fontSize= 12, nodeWidth = 15)
   
   passNetwork <- prependContent(passNetwork, tags$h3("Total Passes Made to each Player")) # Add a title to the Network Diagram
   
   return(passNetwork)
 }
 
 visualizePassesPerPlayer("Alisson Ramsés Becker")
# Nmber of shots each player had
ggplotly(
matchEvents %>% 
  filter(type.name=="Shot") %>%
  count(player.name,team.name) %>%
  ggplot(aes(n, player.name,color=team.name)) +
  geom_point(size = 5) +
  labs(x = "Number of Shots",
       title = "No of Shots Per Player",
       subtitle = "") +
  theme_minimal() 
)
#Function to show Touch map of each player
playerTouchMap<- function(playerName){ggplotly(
matchEvents %>% 
  filter(player.name == playerName) %>%  
  select(location.x,location.y) %>%
  rename(x=location.x,y=location.y) %>%
soccerPath(col="red",lengthPitch = 120,widthPitch = 80,arrow = "r",lwd=0.5) +
scale_y_reverse()  +
  labs(title = "Player's Touch Map",
       subtitle = "") 
)
}
playerTouchMap("Mohamed Salah")
#Average player position for Tottenham
tottAvgPos<-matchEvents %>%
  filter(team.name == "Tottenham Hotspur",is.na(player.name)==F) %>%
  group_by(player.name) %>%
  summarise(x=mean(location.x,na.rm =T),y=mean(location.y,na.rm =T)) #Compute the average(mean) X and Y coordinates for the players
soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",theme="grass",
            title = "", 
            subtitle = "") +
  geom_point(data = tottAvgPos, aes(x = x, y = y),size=4,shape = 21,fill="blue") +
  scale_y_reverse() +
  ggtitle("Average Positions",
          "Average Position Per Player") +
  geom_label_repel(data=tottAvgPos,aes(x,y,label=str_split(player.name," ",simplify =T)[,2]),fontface = "bold",size=4) #Add a label for the player names and avoid the names from overlapping across each other.

#Average player position for Liverpool
livAvgPos<-matchEvents %>%
  filter(team.name == "Liverpool",is.na(player.name)==F) %>%
  group_by(player.name) %>%
  summarise(x=mean(location.x,na.rm =T),y=mean(location.y,na.rm =T)) #Compute the average(mean) X and Y coordinates for the players

soccerPitch(lengthPitch=120,widthPitch=80,arrow = "r",
            title = "", 
            subtitle = "") +
  geom_point(data = livAvgPos, aes(x = x, y = y),size=4,shape = 21,fill="red") +
  scale_y_reverse() +
  ggtitle("Average Positions",
          "Average Position Per Player") +
  geom_label_repel(data=livAvgPos,aes(x,y,label=str_split(player.name," ",simplify =T)[,2]),fontface = "bold",size=4) #Add a label for the player names and avoid the names from overlapping across each other.
#Comparison of Salah and Kane
salVsKane<-matchEvents %>%
   count(player.name,type.name) %>%
   filter(player.name %in% c("Harry Kane","Mohamed Salah"),type.name %in% c("Ball Receipt*","Ball Recovery","Carry","Dispossessed","Duel","Pass","Pressure","Shot")) %>% # List of qualities to compare both players on
   arrange(player.name,type.name)
 
 
 data <- as.data.frame(matrix( salVsKane$n , ncol=8,byrow = T)) #turn the data into a matrix with each quality being a column
 colnames(data) <- c("Ball Receipt","Ball Recovery","Carry","Dispossessed","Duel","Pass","Pressure","Shot")
 rownames(data) <- c("Kane","Salah")
 
 # Include two new rows to the dataframe to specify the maximum and minimum value for the qualities being assessed. maximum of 60 and minimum of 0. Replicated 8 times across each column.
 data <- rbind(rep(60,8) , rep(0,8) , data)
 
 
 # Specify Colors
 colors_border=c( rgb(0.2,0.5,0.5,0.9), rgb(0.8,0.2,0.5,0.9) , rgb(0.7,0.5,0.1,0.9) )
 colors_in=c( rgb(0.2,0.5,0.5,0.4), rgb(0.8,0.2,0.5,0.4) , rgb(0.7,0.5,0.1,0.4) )
 
 # plot the radar chart
 radarchart(data, axistype=1 , 
            pcol=colors_border , pfcol=colors_in , plwd=4 , plty=1,
            cglcol="grey", cglty=1, axislabcol="grey", caxislabels=seq(0,60,15), cglwd=0.8,
            vlcex=0.8 )
 
 # Add a legend to the chart
 legend(x=1, y=1, legend = rownames(data[-c(1,2),]), bty = "n", pch=20 , col=colors_in , text.col = "grey", cex=1.2, pt.cex=3)
 
#Visualise Substitutions made
 # Function to visualize all the substitutions made in the game and the time it was made
 visualizeSubsMade<- function(){
   sub_links<-  matchEvents %>%
     filter(type.name=="Substitution",minute<=100) %>% #Include all substitutions on or before the 100th minute
     select(player.name,substitution.replacement.name,minute) 
   
   if (nrow(sub_links) > 0) { #Condition to return "No Substitution" if none were made before the time
     #All unique players involved in the substitutions made
     sub_nodes <- data.frame(
       name=c(as.character(sub_links$player.name), 
              as.character(sub_links$substitution.replacement.name)) %>% unique()
     )
     
     #ID to link the substituted player with the incoming player
     sub_links$IDsource <- match(sub_links$player.name, sub_nodes$name)-1 
     sub_links$IDtarget <- match(sub_links$substitution.replacement.name, sub_nodes$name)-1
     
     # Assign colors to show player's being substituted and their replacements
     color <- 'd3.scaleOrdinal() .domain(["Roberto Firmino Barbosa de Oliveira", "Georginio Wijnaldum","Harry Winks",
"Moussa Sissoko", "Bamidele Alli", "Sadio Mané", "Divock Okoth Origi", "James Philip Milner","Lucas Rodrigues Moura da Silva",
"Eric Dier","Fernando Llorente Torres","Joe Gomez"]) .range(["red", "red" , "red", "red", "red", "red", "green", "green", "green", "green"
, "green", "green"])'
     
     # Create the network
     subsMadeNetwork <- sankeyNetwork(Links = sub_links, Nodes = sub_nodes,
                        Source = "IDsource", Target = "IDtarget",
                        Value = "minute", NodeID = "name", 
                        sinksRight=F,
                        width= 900, height=600,
                        fontSize= 12, nodeWidth = 15,
                        colourScale=color)
     
     subsMadeNetwork <- prependContent(subsMadeNetwork, tags$h1("Substitutions made"))
     return(subsMadeNetwork)}
   else{
     print("No Substitution")
   } 
 }
 
 visualizeSubsMade()

References

“F1 Data Analysis.” n.d. Accessed March 5, 2021. https://kaggle.com/jonathanbouchet/f1-data-analysis.

Gallagher, Joe. 2021. “JoGall/Soccermatics.” https://github.com/JoGall/soccermatics.

Holtz, Yan. n.d.a. “Basic Radar Chart.” Accessed March 5, 2021. https://www.r-graph-gallery.com/142-basic-radar-chart.html.

———. n.d.b. “Most Basic Sankey Diagram.” Accessed March 5, 2021. https://www.r-graph-gallery.com/321-introduction-to-interactive-sankey-diagram-2.html.

Kabacoff, Rob. n.d. Data Visualization with R. Accessed March 5, 2021. https://rkabacoff.github.io/datavis/.

“Rworldmap Package R Documentation.” n.d. Accessed March 5, 2021. https://www.rdocumentation.org/packages/rworldmap/versions/1.3-6.

“Statsbomb/Open-Data.” 2021. StatsBomb. https://github.com/statsbomb/open-data.